New chunk Ctrl+Alt+I
Execute chunk Ctrl+Shift+Enter
Execute all chunks Ctrl+Alt+R
HTML preview Ctrl+Shift+K
library(readr)
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(tidyverse)
── Attaching core tidyverse packages ────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2 ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ggplot2)
library(reshape2)
Attaching package: ‘reshape2’
The following object is masked from ‘package:tidyr’:
smiths
library(stats)
library(tm)
Loading required package: NLP
Attaching package: ‘NLP’
The following object is masked from ‘package:ggplot2’:
annotate
library(text2vec)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
library(textstem)
Loading required package: koRpus.lang.en
Loading required package: koRpus
Loading required package: sylly
For information on available language packages for 'koRpus', run
available.koRpus.lang()
and see ?install.koRpus.lang()
Attaching package: ‘koRpus’
The following object is masked from ‘package:tm’:
readTagged
The following object is masked from ‘package:readr’:
tokenize
library(syuzhet)
data_posts <- read.csv("~/4year/2semester/dtII/CSVs/HEIs.csv",
colClasses = c(tweet_id = "character"))
# Modifying created_at type so that attribute can be used more easily
data_posts$created_at <- as.POSIXct(data_posts$created_at,
format= "%Y-%m-%dT%H:%M:%S", tz="UTC")
#View(data)
summary(data_posts)
id tweet_id text type bookmark_count favorite_count retweet_count reply_count
Length:11728 Length:11728 Length:11728 Length:11728 Min. : 0.000 Min. : 0.00 Min. : 0.00 Min. : 0.000
Class :character Class :character Class :character Class :character 1st Qu.: 0.000 1st Qu.: 7.00 1st Qu.: 2.00 1st Qu.: 0.000
Mode :character Mode :character Mode :character Mode :character Median : 0.000 Median : 20.00 Median : 5.00 Median : 1.000
Mean : 1.543 Mean : 60.67 Mean : 10.62 Mean : 3.888
3rd Qu.: 1.000 3rd Qu.: 57.00 3rd Qu.: 11.00 3rd Qu.: 3.000
Max. :418.000 Max. :41655.00 Max. :4214.00 Max. :2317.000
view_count created_at hashtags urls media_type media_urls
Min. : 5 Min. :2022-08-01 03:05:11.00 Length:11728 Length:11728 Length:11728 Length:11728
1st Qu.: 2643 1st Qu.:2022-10-19 12:56:27.00 Class :character Class :character Class :character Class :character
Median : 6240 Median :2023-01-29 08:26:30.00 Mode :character Mode :character Mode :character Mode :character
Mean : 14182 Mean :2023-01-30 07:39:34.96
3rd Qu.: 16058 3rd Qu.:2023-05-05 14:16:43.25
Max. :7604544 Max. :2023-08-31 20:50:01.00
NA's :4840
# Count of how many entries each HEI has
number_posts <- data_posts %>%
group_by(id) %>% summarise(count = n())
number_posts
data_posts <- data_posts[data_posts$id != "complutense.csv", ]
number_posts <- data_posts %>%
group_by(id) %>% summarise(posts = n())
number_tweets <- data_posts[data_posts$type == "Tweet", ] %>%
group_by(id) %>% summarise(tweets = n())
number_replies <- data_posts[data_posts$type == "Reply", ] %>%
group_by(id) %>% summarise(replies = n())
print(number_posts)
print(number_tweets)
print(number_replies)
# Merging the counts of tweets and replies with the count of posts
data_ratio <- merge(number_posts, number_tweets, by = "id", all = TRUE)
data_ratio <- merge(data_ratio, number_replies, by = "id", all = TRUE)
data_ratio$percentage_tweets <- round(((data_ratio$tweets / data_ratio$posts) * 100), 2)
data_ratio$percentage_replies <- round(((data_ratio$replies / data_ratio$posts) * 100), 2)
data_ratio <- data_ratio[, c("id", "percentage_tweets", "percentage_replies")]
data_ratio$percentage_replies[is.na(data_ratio$percentage_replies)] <- 0
print(data_ratio)
na_count <- function(){
# Counting the number of NA values for each column
na_count <- colSums(is.na(data_posts))
# Creating a new data frame with the NA counts
na_counts_table <- data.frame(Column = names(na_count), NA_Count = na_count)
print(na_counts_table)
}
data_posts <- data_posts %>%
group_by(id) %>%
mutate(view_percentile = ntile(view_count, 100),
favorite_percentile = ntile(favorite_count, 100),
retweet_percentile = ntile(retweet_count, 100),
reply_percentile = ntile(reply_count, 100)) %>%
rowwise() %>%
mutate(avg_percentile = round(mean(c(view_percentile, favorite_percentile, retweet_percentile, reply_percentile), na.rm = TRUE), 2))
na_count()
data_percentile <- data_posts[, c("id", "view_percentile", "favorite_percentile", "retweet_percentile", "reply_percentile", "avg_percentile")]
print(data_percentile)
max_view_counts <- tapply(data_posts$view_count, data_posts$id, max, na.rm = TRUE)
print(max_view_counts)
duke.csv epfl.csv goe.csv harvard.csv leicester.csv manchester.csv mit.csv sb.csv stanford.csv trinity.csv
307969 105095 15455 2982704 47838 317086 7604544 607498 222593 205333
wv.csv yale.csv
109265 143108
# From view count
data_posts$view_count <- ifelse(
is.na(data_posts$view_count),
round(max_view_counts[data_posts$id] * (data_posts$avg_percentile / 100)),
data_posts$view_count)
# From view percentile
data_posts$view_percentile <- ifelse(
is.na(data_posts$view_percentile),
data_posts$avg_percentile,
data_posts$view_percentile)
na_count()
average_posts <- function(timeframe){
# Calculation of the timeframe between earliest and latest post for each HEI
date_range <- data_posts %>%
group_by(id) %>%
summarise(min_date = min(created_at),
max_date = max(created_at)) %>%
mutate(num_days = as.numeric(difftime(max_date, min_date, units = timeframe)))
# Naming the column respecting the timeframe
column_name <- paste0("avg_posts_per_", timeframe)
# Calculation of the number of posts per day for each HEI
posts_per_timeframe <- number_posts %>%
left_join(date_range, by = "id") %>%
mutate(!!column_name := round((posts / num_days), 2))
print(posts_per_timeframe)
return(posts_per_timeframe)
}
posts_per_day <- average_posts("days")
posts_per_week <- average_posts("weeks")
barplot(posts_per_day$avg_posts_per_days,
names.arg = posts_per_day$id,
main = "Average Posts per Day",
xlab = "HEI",
ylab = "Average Number of Posts",
ylim = c(0, max(posts_per_day$avg_posts_per_days) + 1),
las = 2,
col = "#3498DB")
# Adding text labels over each bar and aligning it with the center of each bar
text(x = barplot(posts_per_day$avg_posts_per_days, plot = FALSE),
y = posts_per_day$avg_posts_per_days,
labels = round(posts_per_day$avg_posts_per_days, 2),
pos = 3)
barplot(posts_per_week$avg_posts_per_weeks,
names.arg = posts_per_week$id,
main = "Average Posts per Week",
xlab = "HEI",
ylab = "Average Number of Posts",
ylim = c(0, max(posts_per_week$avg_posts_per_weeks) + 5),
las = 2,
col = "#E74C3C")
text(x = barplot(posts_per_week$avg_posts_per_weeks, plot = FALSE),
y = posts_per_week$avg_posts_per_weeks,
labels = round(posts_per_week$avg_posts_per_weeks, 2),
pos = 3)
intervals <- list(
interval1 = as.POSIXct(c("2022-08-31", "2022-12-15")),
interval2 = as.POSIXct(c("2023-01-04", "2023-04-01")),
interval3 = as.POSIXct(c("2023-04-14", "2023-06-15"))
)
check_interval <- function(date) {
for (i in 1:length(intervals)) {
interval_start <- intervals[[i]][1]
interval_end <- intervals[[i]][2]
if (date >= interval_start & date <= interval_end) {
return(TRUE)
}
}
return(FALSE)
}
data_posts$academic_year <- sapply(data_posts$created_at, check_interval)
print(data.frame(id = data_posts$id, academic_year = data_posts$academic_year))
analyze_posts <- function(academic_year_filter) {
# Filtering the data based on the academic_year_filter
filtered_data <- data_posts %>%
filter(academic_year == academic_year_filter)
# Count of days for each HEI
unique_days <- filtered_data %>%
group_by(id) %>%
summarise(unique_days = n_distinct(as.Date(created_at)))
# Count of posts for each HEI
number_posts_boolean <- filtered_data %>%
group_by(id) %>%
summarise(count = n())
# Naming the column respecting the time period
time <- ifelse(academic_year_filter, "academic_time", "vacation_time")
column_name <- paste0("avg_posts_in_", time)
# Combination of data and calculation of average posts per day
combined_data <- left_join(unique_days, number_posts_boolean, by = "id")
combined_data <- combined_data %>%
mutate(!!column_name := round((count / unique_days), 2))
print(combined_data)
return(combined_data)
}
data_posts_academic <- analyze_posts(TRUE)
data_posts_vacations <- analyze_posts(FALSE)
barplot(data_posts_academic$avg_posts_in_academic_time,
names.arg = data_posts_academic$id,
main = "Average Posts during Academic Time",
xlab = "HEI",
ylab = "Average Number of Posts",
ylim = c(0, max(data_posts_academic$avg_posts_in_academic_time) + 5),
las = 2,
col = "#34495E")
text(x = barplot(data_posts_academic$avg_posts_in_academic_time, plot = FALSE),
y = data_posts_academic$avg_posts_in_academic_time,
labels = round(data_posts_academic$avg_posts_in_academic_time, 2),
pos = 3)
barplot(data_posts_vacations$avg_posts_in_vacation_time,
names.arg = data_posts_vacations$id,
main = "Average Posts during Vacation Time",
xlab = "HEI",
ylab = "Average Number of Posts",
ylim = c(0, max(data_posts_vacations$avg_posts_in_vacation_time) + 5),
las = 2,
col = "#D35400")
text(x = barplot(data_posts_vacations$avg_posts_in_vacation_time, plot = FALSE),
y = data_posts_vacations$avg_posts_in_vacation_time,
labels = round(data_posts_vacations$avg_posts_in_vacation_time, 2),
pos = 3)
# Creating new table that contains a new column for the day of the week
data_posts_days <- data_posts %>%
mutate(day_of_week = weekdays(created_at))
# Selecting only the id, created_at, and day_of_week columns for the new table
data_posts_days <- data_posts_days %>%
select(id, created_at, day_of_week)
# Create column hour from created_at
data_posts_days$created_hour <- as.numeric(format(data_posts_days$created_at, "%H"))
print(data_posts_days)
# Grouping by id and day_of_week, then counting the number of posts
number_posts_days <- data_posts_days %>%
group_by(id, day_of_week) %>%
summarise(count = n())
`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
# Grouping by id, day_of_week and day created at, then counting th enumber of tweets
number_posts_per_day <- data_posts_days %>%
mutate(created_date = as.Date(created_at)) %>%
group_by(id, day_of_week, created_date) %>%
summarize(count = n())
`summarise()` has grouped output by 'id', 'day_of_week'. You can override using the `.groups` argument.
# Finding for each HEI the average count of posts per day
average_number_posts_per_day <- number_posts_per_day %>%
group_by(id, day_of_week) %>%
summarise(average_count = round(mean(count), 2))
`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
print(number_posts_days)
# Finding the HEI with the lowest count of posts per day
lowest_count <- number_posts_days %>%
group_by(day_of_week) %>%
slice_min(order_by = count) %>%
select(day_of_week, id, count)
# Finding the HEI with the highest count of posts per day
highest_count <- number_posts_days %>%
group_by(day_of_week) %>%
slice_max(order_by = count) %>%
select(day_of_week, id, count)
# Combine the results
high_low_HEI <- bind_rows(lowest_count, highest_count) %>%
arrange(day_of_week)
print(high_low_HEI)
ggplot(high_low_HEI, aes(x = day_of_week, y = count, fill = id)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = count),
position = position_dodge(width = 0.9),
vjust = -0.5,
size = 3) +
labs(title = "Highest and Lowest Count of Posts per Day for Each Day of the Week",
x = "Day of the Week", y = "Count") +
scale_fill_manual(values = rainbow(length(unique(high_low_HEI$id)))) +
theme_minimal() +
theme(legend.title = element_blank())
# Finding the HEI with lowest and highest averaged count of posts per day
high_low_average_HEIs <- average_number_posts_per_day %>%
group_by(day_of_week) %>%
filter(average_count == max(average_count) | average_count == min(average_count)) %>%
arrange(day_of_week, ifelse(average_count == min(average_count), average_count, -average_count))
print(high_low_average_HEIs)
ggplot(high_low_average_HEIs, aes(x = day_of_week, y = average_count, fill = id)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = round(average_count, 2)),
position = position_dodge(width = 0.7),
vjust = -0.5,
size = 3) +
labs(title = "Highest and Lowest Average Count of Posts per Day for Each Day of the Week",
x = "Day of the Week", y = "Average Count") +
scale_fill_manual(values = rainbow(length(unique(high_low_HEI$id)))) +
theme_minimal() +
theme(legend.title = element_blank())
favourite_day_hei <- number_posts_days %>%
group_by(id) %>%
top_n(1, count) %>%
arrange(id)
print(favourite_day_hei)
number_posts_hours <- data_posts_days %>%
group_by(id, created_hour) %>%
summarise(count = n()) %>%
ungroup()
`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
favourite_hour_hei <- number_posts_hours %>%
group_by(id) %>%
top_n(1, count) %>%
arrange(id)
print(favourite_hour_hei)
heatmap_maker <- function(target_id){
# Filtering data for the specific HEI
target_data <- data_posts_days %>%
filter(id == target_id)
# Grouping by day of the week and hour, and counting the number of tweets
tweet_counts <- target_data %>%
group_by(day_of_week, created_hour) %>%
summarise(num_posts = n())
# Plotting heatmap
ggplot(tweet_counts, aes(x = day_of_week, y = created_hour, fill = num_posts)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "blue") +
labs(title = paste("Post Heatmap for", target_id),
x = "Day of the week",
y = "Hour of the day")
}
heatmap_maker("duke.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
heatmap_maker("epfl.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
heatmap_maker("goe.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
heatmap_maker("harvard.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
heatmap_maker("leicester.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
heatmap_maker("manchester.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
heatmap_maker("mit.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
heatmap_maker("sb.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
heatmap_maker("stanford.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
heatmap_maker("trinity.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
heatmap_maker("wv.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
heatmap_maker("yale.csv")
`summarise()` has grouped output by 'day_of_week'. You can override using the `.groups` argument.
barplot(hashtags$hashtag_percentage,
names.arg = hashtags$id,
main = "Hashtags Percentage for Each HEI",
xlab = "HEI",
ylab = "Hashtags Percentage",
ylim = c(0, max(hashtags$hashtag_percentage) + 30),
las = 2,
col= "#F1C40F")
text(x = barplot(hashtags$hashtag_percentage, plot = FALSE),
y = hashtags$hashtag_percentage,
labels = round(hashtags$hashtag_percentage, 2),
pos = 3)
# Transforming empty strings into NA
data_posts$urls[data_posts$urls == ""] <- NA
# Table with number of post, number of NA and url percentage of usage
url_usage <- data_posts %>%
group_by(id) %>%
summarise(count = n(),
na = sum(is.na(urls)),
url_percentage = round(((count - na) / count * 100), 2))
print(url_usage)
barplot(url_usage$url_percentage,
names.arg = url_usage$id,
main = "Urls Percentage for Each HEI",
xlab = "HEI",
ylab = "Urls Percentage",
ylim = c(0, max(url_usage$url_percentage) + 10),
las = 2,
col= "#8E44AD")
text(x = barplot(url_usage$url_percentage, plot = FALSE),
y = url_usage$url_percentage,
labels = round(url_usage$url_percentage, 2),
pos = 3)
data_posts_content <- data_posts %>%
select(id, text)
# Counting number of words
data_posts_content <- data_posts_content %>%
mutate(num_words = lengths(strsplit(text, "\\s+")))
# Grouping by HEI and calculate average, minimum, and maximum values of number of words
data_posts_content_metrics <- data_posts_content %>%
group_by(id) %>%
summarise(average_num_words = mean(num_words),
min_num_words = min(num_words),
max_num_words = max(num_words))
print(data_posts_content_metrics)
ggplot(data_posts_content_metrics, aes(x = id, y = average_num_words)) +
geom_point(aes(color = "Average")) +
geom_errorbar(aes(ymin = min_num_words, ymax = max_num_words, color = "Range"), width = 0.2) +
scale_color_manual(values = c("Average" = "#1976D2", "Range" = "#EF5350")) +
labs(title = "Word Count Summary by HEI",
x = "HEI",
y = "Number of Words",
color = "Metric") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
# Creating table for cluster algorithms
# Joining attribute percentage_tweets (percentage of tweets out of all posts) and percentage_replies (percentage of replies out of all posts) from number_posts also adding unique_hashtags (number of unique hashtags) and hashtag_percentage (percentage of posts that contain a hashtag) from hashtags, per HEI
cluster_table <- merge(select(hashtags, id, unique_hashtags, hashtag_percentage), select(data_ratio, id, percentage_tweets, percentage_replies), by = "id", all=TRUE)
# Joining attribute avg_posts_per_days (average of posts per day) from posts_per_day per HEI
cluster_table <- merge(cluster_table, select(posts_per_day, id, avg_posts_per_days), by = "id", all=TRUE)
# Joining attribute avg_posts_per_weeks (average of posts per week) from posts_per_week per HEI
cluster_table <- merge(cluster_table, select(posts_per_week, id, avg_posts_per_weeks), by = "id", all=TRUE)
# Joining attribute avg_posts_in_academic_time (average of posts during academic time) from data_posts_academic per HEI
cluster_table <- merge(cluster_table, select(data_posts_academic, id, avg_posts_in_academic_time), by = "id", all=TRUE)
# Joining attribute avg_posts_in_vacation_time (average of posts during vacation time) from data_posts_vacations per HEI
cluster_table <- merge(cluster_table, select(data_posts_vacations, id, avg_posts_in_vacation_time), by = "id", all=TRUE)
# Joining attribute created_hour (hour where every HEI made more posts) from favourite_hour_hei per HEI
cluster_table <- merge(cluster_table, select(favourite_hour_hei, id, created_hour), by = "id", all=TRUE)
# Joining attribute url_percentage (percentage of posts that contain an url) from url_usage per HEI
cluster_table <- merge(cluster_table, select(url_usage, id, url_percentage), by = "id", all=TRUE)
# Joining attribute average_num_words (average number of words in the posts) from data_posts_content_metrics per HEI
cluster_table <- merge(cluster_table, select(data_posts_content_metrics, id, average_num_words), by = "id", all=TRUE)
print(cluster_table)
cluster_maker <- function(num_clusters, table){
# Excluding id column for clustering
cluster_data <- select(table, -id)
# Scaling the data for kmeans method
scaled_data <- scale(cluster_data)
kmeans_model <- kmeans(scaled_data, centers = num_clusters, nstart = 10)
# Extract cluster assignments
cluster_assignments <- kmeans_model$cluster
# Create a data frame combining original data with cluster assignments
clustered_data <- cbind(cluster_table$id, cluster_data, cluster = cluster_assignments)
clustered_data <- clustered_data[, c("cluster_table$id", "cluster")]
print(clustered_data)
}
elbow_maker <- function(table){
cluster_data <- select(table, -id)
scaled_data <- scale(cluster_data)
wss <- vector()
range <- 1:10
for (k in range) {
kmeans_model <- kmeans(scaled_data, centers = k, nstart = 10)
wss[k] <- kmeans_model$tot.withinss
}
elbow_df <- data.frame(k = range, WSS = wss)
ggplot(elbow_df, aes(x = k, y = WSS)) +
geom_line() +
geom_point() +
labs(x = "Number of Clusters", y = "Within-Cluster Sum of Squares (WCSS)",
title = "Elbow Method for Optimal k") +
theme_minimal()
}
elbow_maker(cluster_table)
cluster_maker(3, cluster_table)
cleanup <- function(docs, spec.words=NULL){
# lowercase
docs <- tm_map(docs, content_transformer(tolower))
# rm numbers
docs <- tm_map(docs, removeNumbers)
# rm english common stopWords
docs <- tm_map(docs, removeWords, stopwords("english"))
# if stopwords are specified as a character vector
if(!is.null(spec.words))
docs <- tm_map(docs, removeWords, spec.words)
# rm punctuations
docs <- tm_map(docs, removePunctuation)
# rm extra white spaces
docs <- tm_map(docs, stripWhitespace)
# lemmatizing text
docs <- tm_map(docs, lemmatize_words)
docs
}
pairing <- function(word_dict, paired_words) {
if (nrow(word_dict) != length(paired_words)) {
stop("The number of rows in word_dict and the length of paired_words should be equal.")
}
word_dict <- tibble::tibble(word = word_dict$value)
result <- tibble::tibble(word = word_dict$word, category = paired_words)
return(result)
}
classify_text <- function(text, word_pair) {
# Tokenizing the text and converting to lowercase
words <- tolower(unlist(strsplit(text, "\\W+")))
# Finding the frequent words in the text
freq_words <- words[words %in% word_pair$word]
if (length(freq_words) == 0) {
return("Unknown") # Returning Unknown if no frequent words are found
}
# Getting the corresponding categories
categories <- word_pair$category[word_pair$word %in% freq_words]
# Sorting categories by frequency in descending order and return the most frequent one
return(names(sort(table(categories), decreasing = TRUE))[1])
}
corpus_maker <- function(text) {
texts <- text$text
vc <- VectorSource(texts)
corpus <- Corpus(vc)
return(corpus)
}
freq_terms <- function(clean_text, number) {
dtm <- DocumentTermMatrix(clean_text)
dtm.tfidf <- weightTfIdf(dtm)
mdf <- as_tibble(as.matrix(dtm.tfidf))
mdf.freq <- mdf %>%
select(findFreqTerms(dtm, number)) %>%
summarise_all(sum) %>%
gather() %>%
arrange(desc(value))
mdf.freq$key <-
factor(mdf.freq$key,
levels = mdf.freq$key[order(mdf.freq$value)])
word_dictionary <- as_tibble(mdf.freq$key)
ggplot(mdf.freq, aes(x=key, y=value)) +
geom_bar(stat="identity") +
labs(x="terms", y="freq") + coord_flip()
return(word_dictionary)
}
duke_text <- subset(data_posts_content, id == "duke.csv")
duke_corpus <- corpus_maker(duke_text)
duke_clean_text <-cleanup(duke_corpus, c("new", "will", "change", "north", "can", "first", "year", "carolina", "years", "summer", "meet", "one"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
duke_word_dictionary <- freq_terms(duke_clean_text, 30)
duke_word_category <- c("Image", "Education", "Education", "Image", "Image", "Research", "Research", "Image", "Image", "Engagement", "Research", "Research", "Image", "Education", "Education", "Society", "Education", "Engagement")
duke_word_pair <- pairing(duke_word_dictionary, duke_word_category)
print(duke_word_pair)
# Applying the classify_text function to each text
duke_text <- duke_text %>%
mutate(category = sapply(text, classify_text, word_pair = duke_word_pair))
print(duke_text)
epfl_text <- subset(data_posts_content, id == "epfl.csv")
epfl_corpus <- corpus_maker(epfl_text)
epfl_clean_text <- cleanup(epfl_corpus, c("new", "amp", "can", "will", "one", "now", "–", "via", "portrait"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
epfl_word_dictionary <- freq_terms(epfl_clean_text, 30)
epfl_word_category <- c("Image", "Research", "Image", "Research", "Research", "Research", "Education", "Image", "Research", "Research", "Education", "Research", NA, "Education", "Education")
epfl_word_pair <- pairing(epfl_word_dictionary, epfl_word_category)
print(epfl_word_pair)
# Applying the classify_text function to each text
epfl_text <- epfl_text %>%
mutate(category = sapply(text, classify_text, word_pair = epfl_word_pair))
print(epfl_text)
goe_text <- subset(data_posts_content, id == "goe.csv")
goe_corpus <- corpus_maker(goe_text)
goe_clean_text <- cleanup(goe_corpus, c("can", "new", "amp", "will"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
goe_word_dictionary <- freq_terms(goe_clean_text, 15)
goe_word_category <- c("Research", "Research", "Image", "Education", "Engagement", "Image", "Engagement", "Education")
goe_word_pair <- pairing(goe_word_dictionary, goe_word_category)
print(goe_word_pair)
# Applying the classify_text function to each text
goe_text <- goe_text %>%
mutate(category = sapply(text, classify_text, word_pair = goe_word_pair))
print(goe_text)
harvard_text <- subset(data_posts_content, id == "harvard.csv")
harvard_corpus <- corpus_maker(harvard_text)
harvard_clean_text <- cleanup(harvard_corpus, c("new", "can", "will", "summer", "year", "first", "may", "-", "recent", "years", "one", "said", "time", "many", "world", "change"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
harvard_word_dictionary <- freq_terms(harvard_clean_text, 50)
harvard_word_category <- c("Image", "Education", "Research", "Image", "Research", "Research", "Engagement", "Education", "Society", "Education", "Society", "Research", "Education", "Image", "Research", NA, "Research", "Education", "Education", NA, "Research", "Society", "Society", "Research", "Education")
harvard_word_pair <- pairing(harvard_word_dictionary, harvard_word_category)
print(harvard_word_pair)
harvard_text <- harvard_text %>%
mutate(category = sapply(text, classify_text, word_pair = harvard_word_pair))
print(harvard_text)
leicester_text <- subset(data_posts_content, id == "leicester.csv")
leicester_corpus <- corpus_maker(leicester_text)
leicester_clean_text <- cleanup(leicester_corpus, c("👉", "day", "new", "clear", "year", "can", "will", "time", "space", "one", "first"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
leicester_word_dictionary <- freq_terms(leicester_clean_text, 60)
leicester_word_category <- c("Image", NA, "Image", "Engagement", "Society", "Education", "Education", "Research", "Engagement", "Engagement", "Image", "Engagement", "Education", "Engamement", "Engagement", "Education", "Image", "Engagement", "Education", NA, "Image")
leicester_word_pair <- pairing(leicester_word_dictionary, leicester_word_category)
print(leicester_word_pair)
leicester_text <- leicester_text %>%
mutate(category = sapply(text, classify_text, word_pair = leicester_word_pair))
print(leicester_text)
manchester_text <- subset(data_posts_content, id == "manchester.csv")
manchester_corpus <- corpus_maker(manchester_text)
manchester_clean_text <- cleanup(manchester_corpus, c("👇", "can", "just", "will", "get", "well", "one", "help", "now", "new", "read", "congratulations"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
manchester_word_dictionary <- freq_terms(manchester_clean_text, 50)
manchester_word_category <- c(NA, "Image", "Engagement", "Image", "Education", "Education", "Engagement", "Image", "Education", "Society", "Research", "Research", "Education", "Education", "Education", "Engagement", "Research", "Education", "Research")
manchester_word_pair <- pairing(manchester_word_dictionary, manchester_word_category)
print(manchester_word_pair)
manchester_text <- manchester_text %>%
mutate(category = sapply(text, classify_text, word_pair = manchester_word_pair))
print(manchester_text)
mit_text <- subset(data_posts_content, id == "mit.csv")
mit_corpus <- corpus_maker(mit_text)
mit_clean_text <- cleanup(mit_corpus, c("new", "can", "says", "“", "’", "—", "", "may", "first", "will", "using", "way", "one", "science"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
mit_word_dictionary <- freq_terms(mit_clean_text, 40)
mit_word_category <- c("Image", "Research", NA, "Image", NA, "Education", "Research", NA, "Education", "Research", "Research", "Education", "Research", "Research", "Image", "Education", "Education", "Research", "Research", "Research", "Research", "Research", "Research", "Research", "Research", "Society")
mit_word_pair <- pairing(mit_word_dictionary, mit_word_category)
print(mit_word_pair)
mit_text <- mit_text %>%
mutate(category = sapply(text, classify_text, word_pair = mit_word_pair))
print(mit_text)
sb_text <- subset(data_posts_content, id == "sb.csv")
sb_corpus <- corpus_maker(sb_text)
sb_clean_text <- cleanup(sb_corpus, c("new", "—", "will", "week", "can", "amp", "via", "now", "future", "first"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
sb_word_dictionary <- freq_terms(sb_clean_text, 30)
sb_word_category <- c("Image", "Research", "Image", "Image", "Image", NA, "Research", "Image", "Image", "Research", "Education", "Image", "Education", "Image", "Education", "Image", "Society")
sb_word_pair <- pairing(sb_word_dictionary, sb_word_category)
print(sb_word_pair)
sb_text <- sb_text %>%
mutate(category = sapply(text, classify_text, word_pair = sb_word_pair))
print(sb_text)
stanford_text <- subset(data_posts_content, id == "stanford.csv")
stanford_corpus <- corpus_maker(stanford_text)
stanford_clean_text <- cleanup(stanford_corpus, c("new", "will"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
stanford_word_dictionary <- freq_terms(stanford_clean_text, 25)
stanford_word_category <- c("Image", "Image", "Research", "Education", NA)
stanford_word_pair <- pairing(stanford_word_dictionary, stanford_word_category)
print(stanford_word_pair)
stanford_text <- stanford_text %>%
mutate(category = sapply(text, classify_text, word_pair = stanford_word_pair))
print(stanford_text)
trinity_text <- subset(data_posts_content, id == "trinity.csv")
trinity_corpus <- corpus_maker(trinity_text)
trinity_clean_text <- cleanup(trinity_corpus, c("amp", "read", "new", "can", "will", "week", "work", "great", "day", "visit", "irish", "first", "led", "congratulations"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
trinity_word_dictionary <- freq_terms(trinity_clean_text, 40)
Warning: empty document(s): 754
trinity_word_category <- c("Image", "Education", "Research", "Image", "Research", "Image", "Research", "Society", "Society", "Education", "Engagement", "Engagement", "Education", "Image", "Education", "Image", "Research")
trinity_word_pair <- pairing(trinity_word_dictionary, trinity_word_category)
print(trinity_word_pair)
trinity_text <- trinity_text %>%
mutate(category = sapply(text, classify_text, word_pair = trinity_word_pair))
print(trinity_text)
wv_text <- subset(data_posts_content, id == "wv.csv")
wv_corpus <- corpus_maker(wv_text)
wv_clean_text <- cleanup(wv_corpus, c("💛💙", "🙌", "👉", "happy", "day", "see", "week", "great", "can", "will", "well", "know", "new", "now", "get", "just"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
wv_word_dictionary <- freq_terms(wv_clean_text, 40)
wv_word_category <- c(NA, "Image", "Image", NA, "Image", "Image", "Engagement", "Education", "Education", "Education", NA)
wv_word_pair <- pairing(wv_word_dictionary, wv_word_category)
print(wv_word_pair)
wv_text <- wv_text %>%
mutate(category = sapply(text, classify_text, word_pair = wv_word_pair))
print(wv_text)
yale_text <- subset(data_posts_content, id == "yale.csv")
yale_corpus <- corpus_maker(yale_text)
yale_clean_text <- cleanup(yale_corpus, c("new", "—", "will", "can", "'", "first", "work", "read", "help", "year"))
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
yale_word_dictionary <- freq_terms(yale_clean_text, 70)
yale_word_category <- c("Research", "Education", "Education", NA, "Research", "Research", "Research", "Research", "Education", "Image", "Research", "Education", "Image", "Research", NA, "Research", "Image", "Image", "Research", "Image", "Research", "Society", "Research", "Society", "Society")
yale_word_pair <- pairing(yale_word_dictionary, yale_word_category)
print(yale_word_pair)
yale_text <- yale_text %>%
mutate(category = sapply(text, classify_text, word_pair = yale_word_pair))
print(yale_text)
emotions_maker <- function(texts, hei_name){
text_emotion <- get_nrc_sentiment(texts)
# Proportions for text values
emotion_proportions <- colSums(prop.table(text_emotion[, 1:8]))
# Visualization of percentages on each emotion found on posts
barplot <- barplot(
sort(colSums(prop.table(text_emotion[, 1:8]))),
horiz = TRUE,
cex.names = 0.7,
las = 1,
main = sprintf("Emotions found in %s's texts", hei_name),
xlab="Percentage",
xlim = c(0, max(emotion_proportions) * 1.2)
)
text(
x = sort(emotion_proportions),
y = barplot,
labels = sprintf("%.2f%%", 100 * sort(emotion_proportions)),
pos = 4,
cex = 0.7
)
}
duke_text$sentiment <- round(get_sentiment(duke_text$text), 2)
print(duke_text$sentiment)
[1] 0.00 1.00 0.85 -0.50 0.75 0.50 1.50 2.65 0.75 1.75 -1.00 1.30 1.45 1.50 1.60 2.00 0.35 2.45 1.85 0.50 1.45 0.90 1.05 1.65 -0.75
[26] -0.50 1.00 -0.80 2.80 1.50 3.20 2.30 1.00 4.30 -0.75 2.15 0.50 2.75 -0.20 0.50 0.60 1.90 1.20 -0.75 1.25 0.10 1.95 1.95 0.00 2.10
[51] 0.60 -1.00 0.60 -0.25 1.00 1.15 2.95 2.10 1.30 1.05 0.00 0.25 0.80 0.25 0.25 0.00 1.35 1.80 1.60 0.60 1.90 0.00 3.20 1.80 0.00
[76] 0.40 2.05 1.40 1.60 0.20 0.40 -0.40 1.00 2.60 1.65 0.50 0.40 0.75 2.45 0.00 2.45 2.25 0.75 1.10 1.30 0.85 1.95 -1.60 1.10 1.40
[101] 2.05 1.70 3.05 1.45 3.20 0.00 2.15 0.60 0.75 2.75 1.35 4.60 1.70 2.25 1.60 0.00 0.00 1.55 0.00 2.10 4.50 3.05 1.10 2.50 1.00
[126] 0.50 0.75 0.15 1.90 4.60 -0.25 1.15 1.10 1.60 1.10 1.55 0.25 2.70 0.40 0.40 1.20 -0.85 0.80 0.35 0.35 0.00 -1.00 3.30 2.25 2.05
[151] 1.55 -0.60 3.10 0.65 -0.15 1.80 0.25 1.45 2.60 1.95 2.90 2.95 0.50 2.30 1.75 -0.50 2.40 1.00 1.20 2.50 3.05 1.75 1.75 2.00 0.80
[176] 2.80 0.40 1.35 0.35 1.65 3.00 1.35 0.00 1.10 0.60 1.95 2.65 1.40 -1.50 0.25 2.10 0.00 1.70 0.05 2.40 0.50 0.85 0.10 2.10 1.20
[201] 0.40 0.85 2.05 0.35 1.50 1.80 0.60 2.90 0.25 1.10 0.50 1.20 0.50 1.65 -0.70 0.00 2.30 2.30 0.00 0.40 0.70 0.15 -0.50 3.45 1.30
[226] 0.25 3.55 0.10 2.05 1.05 2.15 0.10 0.80 0.00 0.30 0.75 0.90 3.95 0.60 -0.15 1.40 1.80 2.30 0.00 1.60 2.05 0.90 2.35 2.00 -0.50
[251] 0.30 0.00 0.60 0.50 -0.75 0.75 0.30 2.60 -0.10 0.60 3.05 3.60 2.10 1.00 1.40 3.30 2.10 0.75 1.75 2.10 1.50 3.80 3.80 0.40 3.20
[276] 2.40 1.00 0.65 0.75 0.00 0.75 1.55 0.25 0.85 2.55 1.45 0.10 -0.30 0.10 3.05 5.25 1.55 0.15 0.50 -0.50 1.60 2.80 -2.90 -0.25 2.60
[301] 3.15 -0.50 0.40 1.60 -0.25 0.00 0.50 0.55 1.80 2.70 0.00 1.20 -0.50 0.75 1.50 2.50 0.80 0.75 1.80 -0.75 1.35 0.50 0.00 0.25 3.80
[326] 0.50 1.00 2.95 1.40 1.45 1.60 0.75 0.85 3.05 1.30 2.90 0.60 0.10 0.10 1.60 -0.25 -1.75 -1.00 1.55 0.45 0.00 1.10 0.75 0.40 0.00
[351] 0.05 1.30 1.30 0.25 -1.35 0.85 0.00 2.25 0.05 3.15 0.40 0.70 3.40 0.80 3.10 0.25 3.70 0.15 0.25 0.60 0.00 -1.65 0.50 0.65 0.40
[376] 0.80 2.40 0.80 -0.05 0.00 2.35 2.15 0.80 1.80 0.25 2.70 1.05 -1.10 0.50 4.00 0.25 0.80 0.50 0.75 2.15 0.80 0.05 3.55 1.70 0.85
[401] 0.50 1.00 -0.15 0.70 0.00 4.45 0.00 -0.60 3.95 1.15 1.25 4.10 3.00 0.35 3.45 1.60 1.30 2.15 0.80 -0.25 0.50 1.15 0.35 -1.25 1.55
[426] 2.00 1.60 -0.75 2.35 0.85 3.90 0.60 1.10 1.25 1.40 -0.10 4.10 1.30 1.65 1.45 -1.20 1.10 1.45 2.45 0.70 0.80 -2.30 3.65 2.65 1.25
[451] 0.55 3.30 0.95 0.80 2.25 5.15 0.25 1.50 1.90 2.05 0.00 4.10 -2.00 0.50 1.60 0.75 1.60 0.40 2.05 1.55 1.15 0.85 0.80 1.40 3.10
[476] -1.10 -0.25 1.45 2.00 2.05 0.40 4.50 -0.05 1.20 3.20 0.20 4.50 2.50 3.00 1.05 1.30 1.80 0.40 2.10 1.75 -0.50 1.55 3.85 1.20 2.50
[501] 1.90 -0.70 1.50 3.15 1.20 2.10 1.80 1.35 -1.05 -1.00 0.90 -2.05 -0.30 0.00 0.80 1.30 0.20 2.10 0.80 -0.10 2.20 1.55 -1.80 0.80 1.75
[526] 1.35 1.80 -2.10 1.45 0.00 0.75 0.25 0.30 1.95 0.00 0.50 3.75 1.15 1.95 1.00 -0.85 1.05 1.05 0.25 3.50 0.90 2.00 -0.50 1.00 0.55
[551] 1.00 0.60 1.55 0.45 0.45 2.65 0.80 0.40 0.25 0.00 0.25 -0.55 1.85 1.25 0.25 0.80 0.25 0.00 0.95 1.60 2.35 2.35 1.30 0.45 0.60
[576] 0.85 1.10 1.55 1.60 4.40 3.95 0.05 1.70 3.65 0.55 -0.35 1.25 1.30 0.00 0.50 1.05 0.80 -3.40 -0.10 1.00 1.55 2.05 2.95 0.85 -0.25
[601] 0.00 0.80 1.00 0.25 -0.50 -3.05 -0.50 -0.25 2.40 -1.10 4.55 1.25 1.40 -1.15 2.55 2.20 0.80 0.45 1.90 0.15 0.75 0.00 0.50 -0.25 2.55
[626] 0.00 -1.55 1.25 -0.75 0.60 0.30 0.00 2.05 0.45 2.95 3.20 1.45 0.00 1.55 -0.55 1.00 0.00 2.55 1.00 -1.75 -0.25 0.65 0.50 -0.35 2.85
[651] -0.80 0.50 0.50 -0.25 0.15 3.85 1.15 1.25 1.90 2.15 4.05 1.00 0.05 0.95 1.90 1.30 1.35 1.75 1.10 1.30 1.90 -1.00 0.00 1.75 1.65
[676] -1.25 2.50 -0.25 0.25 3.65 2.85 1.90 2.90 1.85 1.75 1.40 0.60 3.35 2.65 0.50
emotions_maker(duke_text$text, "duke")
epfl_text$sentiment <- round(get_sentiment(epfl_text$text), 2)
print(epfl_text$sentiment)
[1] -1.75 0.80 2.75 1.55 1.55 3.10 1.00 3.10 1.00 1.40 0.00 0.65 2.20 0.50 1.00 1.25 0.50 2.30 2.45 1.65 -1.05 0.65 2.10 1.60 3.15
[26] 0.35 1.40 0.80 -1.75 0.50 2.35 -1.05 0.35 0.80 0.85 0.00 -0.15 -0.50 1.85 3.00 1.30 0.80 -0.75 2.55 0.00 0.95 1.50 2.75 1.20 0.00
[51] 0.70 -0.95 1.50 3.00 1.00 2.20 1.55 3.00 -1.60 0.40 0.90 -0.65 1.35 2.60 1.05 0.90 2.50 2.10 2.05 -0.25 4.50 3.35 1.80 1.70 2.85
[76] 1.05 -0.25 -0.40 3.00 0.85 1.60 1.90 1.85 3.25 0.15 0.00 -0.05 0.95 1.45 2.20 -0.70 2.70 1.05 0.70 1.35 2.45 1.55 3.55 3.65 1.15
[101] 0.50 0.65 1.10 1.05 -0.35 3.05 1.80 1.00 1.60 0.85 1.20 3.80 5.50 -1.80 2.60 3.25 0.90 0.80 2.05 3.50 2.05 2.20 0.10 0.00 2.20
[126] 2.35 -0.65 1.35 -0.50 0.90 0.95 2.80 1.10 2.45 1.15 0.70 1.50 -0.20 0.50 1.65 -1.20 1.35 2.95 1.40 0.00 1.50 1.85 0.40 2.10 1.45
[151] 0.00 0.50 -1.00 1.00 1.65 1.90 1.00 0.75 2.25 0.00 0.30 1.55 2.65 1.35 0.75 0.00 1.90 0.30 2.50 1.85 0.85 0.80 1.90 3.25 1.90
[176] 2.55 0.60 0.00 0.40 2.30 2.45 1.05 1.10 3.15 1.65 1.20 0.10 1.95 3.30 0.25 3.00 2.05 2.10 0.35 2.40 0.60 -0.65 2.45 0.40 2.40
[201] 0.80 1.40 3.95 0.30 1.40 0.40 0.40 3.35 2.10 2.05 2.35 1.25 0.95 4.60 2.00 1.90 0.80 1.95 1.55 0.00 0.20 1.30 5.05 0.00 1.20
[226] 1.40 0.75 1.10 0.95 3.80 1.50 2.40 2.45 2.55 1.65 1.90 -0.50 1.50 3.45 2.65 1.55 0.75 1.45 1.55 1.90 2.75 1.20 1.80 1.95 3.55
[251] 1.05 -0.35 2.65 2.15 0.75 2.20 2.00 1.50 2.05 5.65 4.00 2.05 -0.15 2.10 0.90 3.00 0.60 0.00 1.40 3.90 3.00 2.35 0.25 2.65 1.75
[276] 2.50 3.40 -2.75 1.55 1.00 1.00 1.35 0.50 2.35 1.80 2.50 0.65 0.55 2.55 2.85 0.10 0.25 -0.40 1.55 -0.40 0.25 2.60 4.45 0.50 1.25
[301] 2.30 1.80 -1.05 1.15 1.25 1.20 2.75 0.35 2.20 1.00 0.30 0.60 1.55 0.40 -0.15 2.00 2.00 0.70 0.50 0.60 2.00 0.60 1.35 3.65 1.55
[326] 0.65 -1.75 0.85 0.60 0.10 -0.25 1.90 0.25 4.30 0.30 1.10 0.55 0.05 0.00 -1.25 0.25 1.20 0.75 1.75 1.15 0.40 1.65 1.25 1.00 1.30
[351] 4.50 2.55 -0.40 0.80 1.05 2.95 1.00 2.60 1.20 3.35 2.60 2.50 3.65 1.95 2.40 3.50 1.90 2.45 2.45 0.50 1.15 2.15 4.10 1.15 1.80
[376] 2.75 1.95 2.10 2.25 3.80 -0.25 1.90 2.20 0.00 2.80 1.50 -2.10 1.40 0.40 1.05 1.15 1.50 0.00 1.75 1.10 1.90 1.55 -1.05 0.10 2.10
[401] 3.00 3.25 2.65 3.85 0.90 2.70 1.75 0.25 1.80 1.40 1.00 0.55 0.25 1.90 1.20 0.10 0.80 0.80 3.30 0.30 1.65 2.45 0.25 2.90 0.00
[426] 2.95 0.75 1.80 2.00 1.25 1.30 2.05 1.95 2.50 0.40 -0.60 0.85 1.05 0.25 1.35 1.85 0.80 3.30 0.50 1.90 0.55 0.85 1.90 0.60 1.05
[451] 1.05 2.90 -2.00 2.25 1.25 2.00 1.00 3.60 3.05 2.10 2.80 0.55 2.00 1.80 -0.40 3.75 2.45 1.25 1.80 -0.15 -0.30 -0.75 3.20 0.00 2.40
[476] 1.00 4.05 0.05 1.35 2.05 1.15 2.90 1.45 2.45 2.50 4.90 1.85 0.50 0.10 3.05 3.35 -0.05 1.25 2.95 1.30 2.10 2.55 1.55 4.45 4.80
[501] 3.70 0.60 3.25 0.00 1.30 1.50 2.80 0.10 1.00 0.40 3.00 0.50 2.80 2.35 2.65 2.50 2.95 1.70 0.00 0.25 3.60 1.40 2.15 1.25 0.10
[526] 2.90 1.70 3.90 1.15 -0.10 1.65 0.05 3.70 3.35 1.25 -0.90 0.50 1.00 0.65 1.75 1.50 1.75 1.75 1.30 0.25 1.80 0.75 3.20 -1.65 1.80
[551] -0.25 0.00 2.20 -0.30 0.60 0.00 1.55 0.80 1.45 2.65 0.05 0.80 0.90 1.45 0.75 2.65 1.15 0.65 0.75 2.45 1.00 0.75 0.15 -1.65 1.80
[576] 0.50 1.00 1.30 -1.75 1.45 0.75 0.40 3.00 1.50 0.80 3.90 -1.55 1.60 2.30 4.45 1.00 2.75 -0.75
emotions_maker(epfl_text$text, "epfl")
goe_text$sentiment <- round(get_sentiment(goe_text$text), 2)
print(goe_text)
emotions_maker(goe_text$text, "goe")
harvard_text$sentiment <- round(get_sentiment(harvard_text$text), 2)
print(harvard_text)
emotions_maker(harvard_text$text, "harvard")
leicester_text$sentiment <- round(get_sentiment(leicester_text$text), 2)
print(leicester_text)
emotions_maker(leicester_text$text, "leicester")
manchester_text$sentiment <- round(get_sentiment(manchester_text$text), 2)
print(manchester_text)
emotions_maker(manchester_text$text, "manchester")
mit_text$sentiment <- round(get_sentiment(mit_text$text), 2)
print(mit_text)
emotions_maker(mit_text$text, "mit")
sb_text$sentiment <- round(get_sentiment(sb_text$text), 2)
print(sb_text)
emotions_maker(sb_text$text, "sb")
stanford_text$sentiment <- round(get_sentiment(stanford_text$text), 2)
print(stanford_text)
emotions_maker(stanford_text$text, "stanford")
trinity_text$sentiment <- round(get_sentiment(trinity_text$text), 2)
print(trinity_text)
emotions_maker(trinity_text$text, "trinity")
wv_text$sentiment <- round(get_sentiment(wv_text$text), 2)
print(wv_text)
emotions_maker(wv_text$text, "wv")
yale_text$sentiment <- round(get_sentiment(yale_text$text), 2)
print(yale_text)
emotions_maker(yale_text$text, "yale")